Content of this Lab

  1. Reading and exploring network data

  2. Network descriptives and centrality measures

  3. Elementary transformation: dichotomization, transposition, and symmetrization

  4. Multiplex & bipartite networks

  5. Random networks

Data management and representation of networks

Data handling

# 1) READING AND EXPLORING NETWORK DATA

# Information on the data we will use
#browseURL("https://www.stats.ox.ac.uk/~snijders/siena/s50_data.htm")

edges_t1 <- read.csv("./s50_data/s50-network1.dat", header = FALSE, sep = " ") # adjacency matrix of the student network
edges_t1$V1 <- NULL # remove index

node_sport <- read.csv("./s50_data/s50-sport.dat", header = FALSE, sep = " ") # How much sport people do 
node_sport$V1 <- NULL # remove index
names(node_sport) <- c("t1", "t2", "t3") # set column names
node_sport["node_id"] <- colnames(edges_t1)

node_alc <- read.csv("./s50_data/s50-alcohol.dat", header = FALSE, sep = " ") # How much people do sports 
node_alc$V1 <- NULL # remove index
names(node_alc) <- c("t1", "t2", "t3") # set column names
node_alc["node_id"] <- colnames(edges_t1)
# Data exploration
class(node_sport); class(node_alc); class(edges_t1)
## [1] "data.frame"
## [1] "data.frame"
## [1] "data.frame"
dim(node_sport); dim(node_alc); dim(edges_t1)
## [1] 50  4
## [1] 50  4
## [1] 50 50
head(node_sport) # this is a standard data set, each row represents a person (N = 50)
##   t1 t2 t3 node_id
## 1  2  1  1      V2
## 2  1  1  1      V3
## 3  1  1  1      V4
## 4  2  2  1      V5
## 5  2  2  2      V6
## 6  2  2  2      V7
# For your own analysis it it always good to check for duplicates
# length(unique(nodes$ID)) == nrow(nodes))
colnames(node_sport)
## [1] "t1"      "t2"      "t3"      "node_id"
head(edges_t1) # This is an adjacency matrix (student -> reported friend)
##   V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20 V21 V22
## 1  0  0  0  0  0  0  0  0   0   0   1   0   0   1   0   0   0   0   0   0   0
## 2  0  0  0  0  0  0  1  0   0   0   1   0   0   0   0   0   0   0   0   0   0
## 3  0  0  0  1  0  0  0  0   1   0   0   0   0   0   0   0   0   0   0   0   0
## 4  0  0  1  0  0  0  0  0   1   0   0   0   0   0   0   0   0   0   0   0   0
## 5  0  0  0  0  0  0  0  0   0   0   0   0   0   0   0   0   0   0   0   0   0
## 6  0  0  0  0  0  0  0  1   0   0   0   0   0   0   0   0   0   0   0   0   0
##   V23 V24 V25 V26 V27 V28 V29 V30 V31 V32 V33 V34 V35 V36 V37 V38 V39 V40 V41
## 1   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
## 2   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
## 3   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
## 4   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
## 5   0   0   0   0   0   0   0   0   0   0   1   0   0   0   0   0   0   0   0
## 6   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
##   V42 V43 V44 V45 V46 V47 V48 V49 V50 V51
## 1   0   0   0   0   0   0   0   0   0   0
## 2   0   0   0   0   0   0   0   0   0   0
## 3   0   0   0   0   0   0   0   0   0   0
## 4   0   0   0   0   0   0   0   0   0   0
## 5   0   0   0   0   0   0   0   0   0   0
## 6   0   0   0   0   0   0   0   0   0   0
colnames(edges_t1)
##  [1] "V2"  "V3"  "V4"  "V5"  "V6"  "V7"  "V8"  "V9"  "V10" "V11" "V12" "V13"
## [13] "V14" "V15" "V16" "V17" "V18" "V19" "V20" "V21" "V22" "V23" "V24" "V25"
## [25] "V26" "V27" "V28" "V29" "V30" "V31" "V32" "V33" "V34" "V35" "V36" "V37"
## [37] "V38" "V39" "V40" "V41" "V42" "V43" "V44" "V45" "V46" "V47" "V48" "V49"
## [49] "V50" "V51"
# If we had an edge list, I makes sense check whether there are any duplicated ties (any(duplicated(edge_list))
# Conversion to igraph graph
mtx <- as.matrix(edges_t1)
grph <- graph_from_adjacency_matrix(mtx) # igraph functions take matrices as inputs
length(get.edgelist(grph)) # 226 ties
## [1] 226
length(isolates(mtx)) # number of isolates
## [1] 3
# visualise graph
plot.igraph(grph,
            vertex.label='',vertex.size=4,
            edge.color='darkgrey',edge.arrow.size=.2,
            layout=layout_with_kk(grph), # other options: layout_nicely, layout_with_fr (<-- this one is furchterman reingold)
            main='Friendship network')

# Addition of nodes' attributes (amount of sport and alcohol consumption)
# Careful to correctly match the igraph object with attribute data
V(grph)$name[1:10] # This is the order in which nodes appear in the igraph object
##  [1] "V2"  "V3"  "V4"  "V5"  "V6"  "V7"  "V8"  "V9"  "V10" "V11"
node_sport$node_id[1:10] # This - the order in the other element
##  [1] "V2"  "V3"  "V4"  "V5"  "V6"  "V7"  "V8"  "V9"  "V10" "V11"
# Use the match function
node_sport[match(V(grph)$name,node_sport$node_id),]$t1 # usually nodes and edges are not aligned in the same order => make sure you assign the right attributes to the right node 
##  [1] 2 1 1 2 2 2 1 2 2 2 2 2 1 1 2 2 1 1 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [39] 2 2 2 1 2 1 2 1 1 2 1 2
node_alc[match(V(grph)$name,node_alc$node_id),]$t1 
##  [1] 3 2 2 2 3 4 4 4 2 4 5 5 3 3 4 4 2 4 3 2 1 3 4 3 3 4 2 2 3 1 4 4 3 2 3 4 2 3
## [39] 2 1 4 4 2 5 2 2 2 2 1 1
V(grph)$sport <- node_sport[match(V(grph)$name,node_sport$node_id),]$t1
V(grph)$alc_con <- node_alc[match(V(grph)$name,node_alc$node_id),]$t1

# Final inspection of the igraph object
grph
## IGRAPH 5735053 DN-- 50 113 -- 
## + attr: name (v/c), sport (v/n), alc_con (v/n)
## + edges from 5735053 (vertex names):
##  [1] V2 ->V12 V2 ->V15 V3 ->V8  V3 ->V12 V4 ->V5  V4 ->V10 V5 ->V4  V5 ->V10
##  [9] V6 ->V33 V7 ->V9  V8 ->V3  V8 ->V43 V8 ->V45 V9 ->V7  V10->V4  V10->V5 
## [17] V11->V12 V11->V16 V11->V34 V12->V3  V12->V16 V12->V17 V13->V8  V13->V43
## [25] V13->V45 V15->V2  V15->V11 V15->V12 V16->V11 V16->V12 V16->V17 V17->V12
## [33] V17->V16 V18->V19 V18->V20 V18->V22 V18->V23 V18->V25 V19->V20 V19->V36
## [41] V20->V12 V20->V25 V20->V27 V20->V31 V22->V23 V23->V18 V23->V22 V23->V32
## [49] V23->V35 V24->V25 V25->V18 V25->V20 V25->V22 V25->V23 V25->V24 V26->V23
## [57] V26->V32 V26->V33 V27->V8  V27->V30 V27->V45 V28->V29 V28->V30 V28->V31
## + ... omitted several edges
# Remember nodes attributes can be visualized
# -- Sport
plot.igraph(grph,
            vertex.label='',vertex.size=4,vertex.color=ifelse(V(grph)$sport == 2,'royalblue','orange'), # 2 means regular sport
            edge.color='darkgrey',edge.arrow.size=.2,
            layout=layout_with_kk(grph),
            main='Friendship network with level of sport \nblue: regular, orange: not regular')

# legend("bottomright", legend = c('Regular','not regular','Unknown'), 
#        title = "Sport", 
#        pch=21, pt.bg=c("royalblue","orange",'white'))


# -- Alcohol consumption
# Create a palette
unique(V(grph)$alc_con)
## [1] 3 2 4 5 1
pal <- c("royalblue", "lightblue", "lightgreen", "orange", "red") # Alcohol: 1 (non), 2 (once or twice a year), 3 (once a month), 4 (once a week) and 5 (more than once a week).

# Get each node's consumption behaviour
nodeCon <- V(grph)$alc_con[!duplicated(V(grph)$name)]

# Assign to the vertices as a color attribute
V(grph)$alc_color <- pal[nodeCon]

plot.igraph(grph,
            vertex.label='', 
            vertex.size=4,
            vertex.color=V(grph)$alc_color, # 2 means regular sport
            edge.color='darkgrey',
            edge.arrow.size=.2,
            layout=layout_with_kk(grph),
            main='Friendship network with alcohol consumption \nblue (non), lightblue (once or twice a year), green (once a month),\norange (once a week) and red (more than once a week)')

#[this piece of code works in the .R file, not in the .rmd one]
# Legend
# legend("bottomright", 
#        legend = c("non", "once or twice a year", "once a month", "once a week", "more than once a week"), 
#        title = "Alcohol consumption", 
#        pch=21, 
#        pt.bg=pal)

Descriptives

# 2) NETWORK DESCRIPTIVES AND CENTRALITY MEASURES

# 2.1) Components
igraph::components(grph)
## $membership
##  V2  V3  V4  V5  V6  V7  V8  V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20 V21 
##   1   1   2   2   1   3   1   3   2   1   1   1   4   1   1   1   1   1   1   5 
## V22 V23 V24 V25 V26 V27 V28 V29 V30 V31 V32 V33 V34 V35 V36 V37 V38 V39 V40 V41 
##   1   1   1   1   1   1   1   1   1   1   1   1   1   1   1   6   1   6   1   7 
## V42 V43 V44 V45 V46 V47 V48 V49 V50 V51 
##   6   1   1   1   7   7   7   7   7   8 
## 
## $csize
## [1] 33  3  2  1  1  3  6  1
## 
## $no
## [1] 8
igraph::components(grph)$no # Number of components: 8
## [1] 8
igraph::components(grph)$csize # Remember each isolate is her own component
## [1] 33  3  2  1  1  3  6  1
igraph::components(grph)$csize[igraph::components(grph)$csize > 1] 
## [1] 33  3  2  3  6
sum(igraph::degree(grph) == 0) # isolates
## [1] 3
# 2.2) Density
igraph::edge_density(grph) # you can also report it as a percentage, density is sensitive to size
## [1] 0.04612245
# I personally find always more useful average degree
mean(igraph::degree(grph, mode="in")) # You can use either "in" or "out"
## [1] 2.26
# 2.3) Reciprocity
igraph::reciprocity(grph) # in igraph, reciprocity is edgewise by default
## [1] 0.6902655
# This is not the case in sna => gives the percentage of how many ties are reciprocal 

# 2.4) Transitivity, indicates the clustering, whether there are people that form triangles
igraph::transitivity(grph)
## [1] 0.4229075
# 2.5) Degree distribution
mean(igraph::degree(grph, mode='out'))
## [1] 2.26
# Standard deviations
sd(igraph::degree(grph, mode='out'))
## [1] 1.19198
sd(igraph::degree(grph, mode='in'))
## [1] 1.575449
# Range
range(igraph::degree(grph, mode='out'))
## [1] 0 5
range(igraph::degree(grph, mode='in'))
## [1] 0 8
# centrality measures 
hist(igraph::degree(grph))

hist(igraph::betweenness(grph, directed=FALSE, weights=NA))

hist(igraph::closeness(grph)) # here we get NANs as closeness makes only sense for connected graphs

hist(igraph::harmonic_centrality(grph)) # alternative for disconnected graphs

hist(igraph::page_rank(grph)$vector)

# Let's save degrees
dgr <- data.frame(personid = rep(V(grph)$name,times=2),
                  degree = c(igraph::degree(grph, mode='out'), igraph::degree(grph, mode='in')),
                  type = rep(c('outdegree','indegree'), each=vcount(grph)))
unique(dgr$type)
## [1] "outdegree" "indegree"
head(dgr,10)
##    personid degree      type
## 1        V2      2 outdegree
## 2        V3      2 outdegree
## 3        V4      2 outdegree
## 4        V5      2 outdegree
## 5        V6      1 outdegree
## 6        V7      1 outdegree
## 7        V8      3 outdegree
## 8        V9      1 outdegree
## 9       V10      2 outdegree
## 10      V11      3 outdegree
# Visualization
ggplot(data=dgr) +
  geom_histogram(aes(x=degree), color='black', fill='orange') +
  facet_wrap(~type,scales='free') +
  theme_classic()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# We can include this information to the igraph and network visualization...
V(grph)$indegree <- degree(grph,mode='in')
grph
## IGRAPH 5735053 DN-- 50 113 -- 
## + attr: name (v/c), sport (v/n), alc_con (v/n), alc_color (v/c),
## | indegree (v/n)
## + edges from 5735053 (vertex names):
##  [1] V2 ->V12 V2 ->V15 V3 ->V8  V3 ->V12 V4 ->V5  V4 ->V10 V5 ->V4  V5 ->V10
##  [9] V6 ->V33 V7 ->V9  V8 ->V3  V8 ->V43 V8 ->V45 V9 ->V7  V10->V4  V10->V5 
## [17] V11->V12 V11->V16 V11->V34 V12->V3  V12->V16 V12->V17 V13->V8  V13->V43
## [25] V13->V45 V15->V2  V15->V11 V15->V12 V16->V11 V16->V12 V16->V17 V17->V12
## [33] V17->V16 V18->V19 V18->V20 V18->V22 V18->V23 V18->V25 V19->V20 V19->V36
## [41] V20->V12 V20->V25 V20->V27 V20->V31 V22->V23 V23->V18 V23->V22 V23->V32
## [49] V23->V35 V24->V25 V25->V18 V25->V20 V25->V22 V25->V23 V25->V24 V26->V23
## + ... omitted several edges
plot.igraph(grph,
            vertex.label='',
            vertex.size=V(grph)$indegree, 
            vertex.color=ifelse(V(grph)$sport == 2,'royalblue','orange'),
            edge.color='darkgrey',
            edge.arrow.size=.2,
            layout=layout_with_kk(grph),
            main='Friendship network, \nnode color indicates amount of sport \nand node size indicated the indegree')

# Other types of centrality that can be obtained are, for example, betweenness, closeness
igraph::betweenness(grph)
##         V2         V3         V4         V5         V6         V7         V8 
##   0.000000  50.833333   0.000000   0.000000   0.000000   0.000000  54.333333 
##         V9        V10        V11        V12        V13        V14        V15 
##   0.000000   0.000000  44.000000 128.833333   0.000000   0.000000   5.000000 
##        V16        V17        V18        V19        V20        V21        V22 
##  43.833333   0.000000 176.000000  38.000000 202.000000   0.000000  70.000000 
##        V23        V24        V25        V26        V27        V28        V29 
## 220.000000   0.000000  67.000000  25.666667  72.000000  12.000000   0.000000 
##        V30        V31        V32        V33        V34        V35        V36 
##  10.666667  79.166667  68.500000  47.500000  47.333333  10.000000   0.000000 
##        V37        V38        V39        V40        V41        V42        V43 
##   0.000000   3.333333   0.000000   0.000000   1.500000   0.000000   0.000000 
##        V44        V45        V46        V47        V48        V49        V50 
##  26.000000  11.000000   1.500000  10.000000   0.000000   0.000000   3.000000 
##        V51 
##   0.000000
# 2.6) Homophily/assortativity
assortativity_degree(grph,directed=TRUE) # Assortativity degree 
## [1] 0.1508808
# Careful, positive assortativity means homophily (contrary to the EI index)
?assortativity

# Homophily by sport
isnar::assort(grph,'sport')
## [1] 0.1716821
isnar::ei(grph,'sport') # EI index
## [1] -0.3274336
# Homophily by alcohol
isnar::assort(grph,'alc_con')
## [1] 0.1162336
isnar::ei(grph,'alc_con') # EI index
## [1] 0.3451327

Exercise 1:

  1. Find out who are the nodes with highest “indegree” and “outdegree”

  2. Can you visualise your network in a way so that node color depict different centrality measures?

  3. Repeat descriptive but for the final year of data collection. Which differences do you observe?

# EXERCISES
# 1) Find out who are the nodes with highest "indegree" and "outdegree"

# person with highest indegree
dgr$personid[max(dgr$degree[dgr$type =="indegree"])]
## [1] "V9"
# person with highest outdegree
dgr$personid[max(dgr$degree[dgr$type =="outdegree"])]
## [1] "V6"
# 2) Can you visualize your network in a way so that node color depict different centrality measures? 

igraph::betweenness(grph) # betweenness centrality 
##         V2         V3         V4         V5         V6         V7         V8 
##   0.000000  50.833333   0.000000   0.000000   0.000000   0.000000  54.333333 
##         V9        V10        V11        V12        V13        V14        V15 
##   0.000000   0.000000  44.000000 128.833333   0.000000   0.000000   5.000000 
##        V16        V17        V18        V19        V20        V21        V22 
##  43.833333   0.000000 176.000000  38.000000 202.000000   0.000000  70.000000 
##        V23        V24        V25        V26        V27        V28        V29 
## 220.000000   0.000000  67.000000  25.666667  72.000000  12.000000   0.000000 
##        V30        V31        V32        V33        V34        V35        V36 
##  10.666667  79.166667  68.500000  47.500000  47.333333  10.000000   0.000000 
##        V37        V38        V39        V40        V41        V42        V43 
##   0.000000   3.333333   0.000000   0.000000   1.500000   0.000000   0.000000 
##        V44        V45        V46        V47        V48        V49        V50 
##  26.000000  11.000000   1.500000  10.000000   0.000000   0.000000   3.000000 
##        V51 
##   0.000000
# Visualisation
#
# Assign centrality score as attribute, example here is betweenness centrality
V(grph)$bcentrality <- igraph::betweenness(grph, directed=FALSE, weights=NA)
V(grph)$dcentrality <- igraph::degree(grph)
V(grph)$ccentrality <- igraph::harmonic_centrality(grph, mode = "all") #alternative to closeness centrality for disconnected graphs
V(grph)$pcentrality <- igraph::page_rank(grph)$vector

#?igraph::harmonic_centrality

hist(V(grph)$bcentrality)

# Normalise values to get a ranking
normalize <- function(x){(x-min(x))/(max(x)-min(x))} # min-max scaling brings everything to the interval [0,1] 
V(grph)$bcentrality_index <- round(normalize(V(grph)$bcentrality)*9)+1 # this adjustment returns values between [1, 10] 
V(grph)$bcentrality_index
##  [1]  1  2  1  1  1  1  2  1  1  1  6  1  1  1  1  1  4  2 10  1  3  6  1  6  1
## [26]  4  2  1  1  4  1  2  1  1  1  1  1  1  1  1  1  1  2  2  1  1  1  1  1  1
# Map 10 colors to the 10 centrality-measure categories
V(grph)$bcolor_centrality <- colorRampPalette(c("steelblue", "gold","darkred"))(10)[V(grph)$bcentrality_index]

# do it for the others, too
V(grph)$ccentrality_index <- round(normalize(V(grph)$ccentrality)*9)+1 # this adjustment returns values between [1, 10] 
V(grph)$ccolor_centrality <- colorRampPalette(c("steelblue", "gold","darkred"))(10)[V(grph)$ccentrality_index] # Map 10 colors to the 10 centrality-measure categories

V(grph)$dcentrality_index <- round(normalize(V(grph)$dcentrality)*9)+1 # this adjustment returns values between [1, 10] 
V(grph)$dcolor_centrality <- colorRampPalette(c("steelblue", "gold","darkred"))(10)[V(grph)$dcentrality_index] # Map 10 colors to the 10 centrality-measure categories

V(grph)$pcentrality_index <- round(normalize(V(grph)$pcentrality)*9)+1 # this adjustment returns values between [1, 10] 
V(grph)$pcolor_centrality <- colorRampPalette(c("steelblue", "gold","darkred"))(10)[V(grph)$pcentrality_index] # Map 10 colors to the 10 centrality-measure categories

#par(mfrow=c(2,2))
set_layout <- layout_with_kk(grph)

# plot
plot(grph,
     vertex.color = V(grph)$dcolor_centrality,
     vertex.label=NA,
     vertex.size=7, 
     main='Degree centrality',
     edge.width = 0.5, 
     edge.arrow.size = 0.2, 
     layout = set_layout)

plot(grph,
     vertex.color = V(grph)$ccolor_centrality,
     vertex.label=NA,
     vertex.size=7, 
     main='Harmonic centrality',
     edge.width = 0.5, 
     edge.arrow.size = 0.2, 
     layout = set_layout)

plot(grph,
     vertex.color = V(grph)$bcolor_centrality,
     vertex.label=NA,
     vertex.size=7, 
     main='Betweenness centrality',
     edge.width = 0.5, 
     edge.arrow.size = 0.2, 
     layout = set_layout)

plot(grph,
     vertex.color = V(grph)$pcolor_centrality,
     vertex.label=NA,
     vertex.size=7, 
     main='Pagerank',
     edge.width = 0.5, 
     edge.arrow.size = 0.2, 
     layout = set_layout)

Pagerank value of a node takes into account (i) the number of links it receives, (ii) the link propensity of the linkers, and (iii) the centrality of the linkers.

# 3) Repeat the descriptive analysis but for the final year of data collection 


# ties for third wave
edges_t3 <- read.csv("./s50_data/s50-network3.dat", header = FALSE, sep = " ") # adjacency matrix of the student network
edges_t3$V1 <- NULL # remove index

V(grph)$sport <- node_sport[match(V(grph)$name,node_sport$node_id),]$t3
V(grph)$alc_con <- node_alc[match(V(grph)$name,node_alc$node_id),]$t3

mtx_t3 <- as.matrix(edges_t3)
grph_t3 <- graph_from_adjacency_matrix(mtx_t3)

plot.igraph(grph_t3,
            vertex.label='',vertex.size=7,
            edge.color='darkgrey',
            edge.arrow.size=.2,
            layout=layout_with_kk(grph), # other options: layout_nicely, layout_with_fr (<-- this one is furchterman reingold)
            main='Friendship network')

plot.igraph(grph_t3,
            vertex.label='',
            vertex.size=7,
            vertex.color=ifelse(V(grph)$sport == 2,'royalblue','orange'), # 2 means regular sport
            edge.color='darkgrey',
            edge.arrow.size=.2,
            layout=layout_with_kk(grph),
            main='Friendship network with level of sport \nblue: regular, orange: not regular')

plot.igraph(grph_t3,
            vertex.label='', 
            vertex.size=7,
            vertex.color=V(grph)$alc_color, # 2 means regular sport
            edge.color='darkgrey',
            edge.arrow.size=.2,
            layout=layout_with_kk(grph),
            main='Friendship network with alcohol consumption \nblue (non), lightblue (once or twice a year), green (once a month),\norange (once a week) and red (more than once a week)')

# 2.1) Components
igraph::components(grph_t3)
## $membership
##  V2  V3  V4  V5  V6  V7  V8  V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20 V21 
##   1   2   3   3   3   3   2   4   3   1   1   1   5   1   1   2   3   6   1   7 
## V22 V23 V24 V25 V26 V27 V28 V29 V30 V31 V32 V33 V34 V35 V36 V37 V38 V39 V40 V41 
##   4   3   2   3   4   2   3   3   1   1   4   3   1   3   4   1   4   1   4   4 
## V42 V43 V44 V45 V46 V47 V48 V49 V50 V51 
##   1   2   4   2   4   4   4   4   4   4 
## 
## $csize
## [1] 13  7 12 15  1  1  1
## 
## $no
## [1] 7
igraph::components(grph_t3)$no # Number of components: 3
## [1] 7
igraph::components(grph_t3)$csize # Remember each isolate is her own component
## [1] 13  7 12 15  1  1  1
sum(igraph::degree(grph_t3) == 0) # isolates
## [1] 3
# 2.2) Density
igraph::edge_density(grph_t3) # density at t1: 0.046
## [1] 0.04979592
mean(igraph::degree(grph_t3, mode="in")) # at t1: 2.26
## [1] 2.44
# 2.3) Reciprocity
igraph::reciprocity(grph_t3) # at t1: 0.69
## [1] 0.7377049
# 2.4) Transitivity, indicates the clustering, whether there are people that form triangles
igraph::transitivity(grph_t3) # at t1: 0.42
## [1] 0.5181818
# 2.5) Degree distribution
mean(igraph::degree(grph_t3, mode='out')) # at t1: 2.26
## [1] 2.44
# Standard deviations
sd(igraph::degree(grph_t3, mode='out'))
## [1] 1.500476
sd(igraph::degree(grph_t3, mode='in'))
## [1] 1.527436
# Range
range(igraph::degree(grph_t3, mode='out'))
## [1] 0 5
range(igraph::degree(grph_t3, mode='in'))
## [1] 0 7

Transformations

# 3) BASIC TRANSFORMATION

# 3.1) Dichotomization (recoding)
# Useful when valued or weighted matrices, but we aim for a simple analysis (logistic regression)
# Image, respondents were asked to assess their relationship with others on a scale 1 to 5

w.mtx <- matrix(c(0,5,2,1,2,
                  4,0,0,3,2,
                  2,2,0,4,3,
                  1,2,5,0,3,
                  1,1,5,4,0),
                nrow=5,ncol=5,byrow=TRUE,
                dimnames = list(c('A','B','C','D','E'),c('A','B','C','D','E')))
w.mtx
##   A B C D E
## A 0 5 2 1 2
## B 4 0 0 3 2
## C 2 2 0 4 3
## D 1 2 5 0 3
## E 1 1 5 4 0
# igraph object
w.grph <- graph.adjacency(w.mtx,weighted = TRUE)
w.grph
## IGRAPH 4e9b977 DNW- 5 19 -- 
## + attr: name (v/c), weight (e/n)
## + edges from 4e9b977 (vertex names):
##  [1] A->B A->C A->D A->E B->A B->D B->E C->A C->B C->D C->E D->A D->B D->C D->E
## [16] E->A E->B E->C E->D
plot.igraph(w.grph) # a fully connected network

plot(w.grph,
     edge.width=E(w.grph)$weight, 
     main="weights visible", 
     edge.curved=c(rep(0.2, 19))
     ) # With some ties stronger than others

# You decide that at least a 4 needs to be given for a tie to exist
# Let's keep only strong ties: 4 OR 5
dich.mtx <- w.mtx
dich.mtx[dich.mtx %in% 0:3] <- 0
dich.mtx[dich.mtx %in% 4:5] <- 1
dich.mtx
##   A B C D E
## A 0 1 0 0 0
## B 1 0 0 0 0
## C 0 0 0 1 0
## D 0 0 1 0 0
## E 0 0 1 1 0
dich.grph <- graph.adjacency(dich.mtx)
dich.grph
## IGRAPH 4f97236 DN-- 5 6 -- 
## + attr: name (v/c)
## + edges from 4f97236 (vertex names):
## [1] A->B B->A C->D D->C E->C E->D
plot(dich.grph,
     main='dichotomized version',  
     edge.curved=c(rep(0.25, ecount(dich.grph)))
     )

# 3.2) Transposition (shifting rows for columns)
# Reversing the direction of the ties
# Useful depending on the data collection procedure
(dich.mtx.t <- t(dich.mtx))
##   A B C D E
## A 0 1 0 0 0
## B 1 0 0 0 0
## C 0 0 0 1 1
## D 0 0 1 0 1
## E 0 0 0 0 0
dich.grph.t <- graph.adjacency(dich.mtx.t)
plot(dich.grph.t,
     main='transposed version')

# 3.3) Symmetrization (from directed to undirected network)
(sym.mtx.weak <- symmetrize(dich.mtx,rule='weak')) # asymmetric ties kept (max-symmetrizing)
##      [,1] [,2] [,3] [,4] [,5]
## [1,]    0    1    0    0    0
## [2,]    1    0    0    0    0
## [3,]    0    0    0    1    1
## [4,]    0    0    1    0    1
## [5,]    0    0    1    1    0
(sym.mtx.stro <- symmetrize(dich.mtx,rule='strong')) # only mutual ties are kept (min-symmetrizing)
##      [,1] [,2] [,3] [,4] [,5]
## [1,]    0    1    0    0    0
## [2,]    1    0    0    0    0
## [3,]    0    0    0    1    0
## [4,]    0    0    1    0    0
## [5,]    0    0    0    0    0
# The function removes the nodes' names
dimnames(sym.mtx.weak) <- list(c('A','B','C','D','E'),c('A','B','C','D','E'))
dimnames(sym.mtx.stro) <- list(c('A','B','C','D','E'),c('A','B','C','D','E'))

# Convert into igraph objects
sym.grph.weak <- graph.adjacency(sym.mtx.weak,mode='undirected') # notice the argument: mode="undirected"
sym.grph.stro <- graph.adjacency(sym.mtx.stro,mode='undirected') # notice the argument: mode="undirected"

par(mfrow=c(1,2)) # Two plots side to side
plot.igraph(sym.grph.weak,main='Max-symmetrized')
plot.igraph(sym.grph.stro,main='Min-symmetrized')

par(mfrow=c(1,1))

Bipartite networks

# 4) BIPARTITE AND MULTIPLEX NETWORKS

# 4.1) Bipartite networks (two types of nodes)
bip.mtx <- matrix(data=c(1,0,0,0,
                         1,1,0,0,
                         0,0,1,0,
                         0,1,1,0,
                         1,0,0,1,
                         1,1,0,1,
                         0,0,0,1),
                  nrow=7,ncol=4,byrow = TRUE,
                  dimnames = list(c('Agnes','Birgit','Cecilie','Dan','Elijah','Frank','Gerda'),
                                  c('Spotify','Ikea','Volvo','H&M')))
bip.mtx
##         Spotify Ikea Volvo H&M
## Agnes         1    0     0   0
## Birgit        1    1     0   0
## Cecilie       0    0     1   0
## Dan           0    1     1   0
## Elijah        1    0     0   1
## Frank         1    1     0   1
## Gerda         0    0     0   1
# You can derive two unipartite (weighted) networks using matrix multiplication
bip.mtx %*% t(bip.mtx) 
##         Agnes Birgit Cecilie Dan Elijah Frank Gerda
## Agnes       1      1       0   0      1     1     0
## Birgit      1      2       0   1      1     2     0
## Cecilie     0      0       1   1      0     0     0
## Dan         0      1       1   2      0     1     0
## Elijah      1      1       0   0      2     2     1
## Frank       1      2       0   1      2     3     1
## Gerda       0      0       0   0      1     1     1
t(bip.mtx) %*% bip.mtx
##         Spotify Ikea Volvo H&M
## Spotify       4    2     0   2
## Ikea          2    3     1   1
## Volvo         0    1     2   0
## H&M           2    1     0   3

EXERCISE

  • Notice, that the matrices are symmetic.

What does the diagonal capture? And the values off the diagonal?

For the matrix on people: - The diagonal covers how many companies a person likes. - The off-diagonal elements show in how many company dimensions to people have the same preference, i.e. Frank and Elijah have an interest overlap in terms of two companies.

For the matrix on companies: - The diagonal covers how many persons like a company, i.e. how popular a company is.
- The off-diagonal elements show how many people share interest in two companies.

Multiplex networks

# 4.2) Multiplex network: the special case of signed graphs (positive and negative ties)
# Say values of 1 in w.mtx means "conflictual relationship", we can derive another network (on top of dich.mtx)
neg.mtx <- w.mtx
neg.mtx[neg.mtx %in% 2:5] <- 0
neg.mtx
##   A B C D E
## A 0 0 0 1 0
## B 0 0 0 0 0
## C 0 0 0 0 0
## D 1 0 0 0 0
## E 1 1 0 0 0
# Turn into igraph format
neg.grph <- graph.adjacency(neg.mtx,mode='directed')
plot.igraph(neg.grph,edge.color='red',main='Negative ties')

# We can merge positive and negative ties now
E(neg.grph)$sign <- 'negative'
signed.grph <- graph.union(dich.grph,neg.grph)
signed.grph
## IGRAPH 6fc148f DN-- 5 10 -- 
## + attr: name (v/c), sign (e/c)
## + edges from 6fc148f (vertex names):
##  [1] E->D E->C E->B E->A D->C D->A C->D B->A A->D A->B
# Non-negative ties are filled with NA
E(signed.grph)$sign
##  [1] NA         NA         "negative" "negative" NA         "negative"
##  [7] NA         NA         "negative" NA
E(signed.grph)$sign[is.na(E(signed.grph)$sign)] <- 'positive'

# Visualization
plot(signed.grph,
     edge.color=ifelse(E(signed.grph)$sign == 'negative','red','darkgrey'),
     edge.lty=ifelse(E(signed.grph)$sign == 'negative',2,1), # Dashed vs solid lines
     main='Signed graph')

# legend("bottomright", legend = c('Positive tie','Negative tie'),
#        col=c('darkgrey','red'),lty=c(1,2), 
#        lwd=2, ncol=1)

Exercises on random graphs

Erdős–Rényi model

The simplest version of a random graph is the Erdös-Rényi (ER) model, sometimes called the \(G_{n, p}\) model. In the model, the number of nodes \(n\) and the connection probability \(p\) are pre-defined. Then, the model produces a graph with random connections.

Despite its simplicity, Erdös-Rényi graphs show some characteristics similar to real-world networks. As their mathematics is relatively simple, they have been studied extensively. They can serve as a null model to conduct statistical tests on networks.

Often, ER-graphs a little too simplistic to provide appropriate comparisons for empirical networks. Therefore, more sophisticated models and techniques for statistical hypothesis testing have been developed, which will be covered later.

Exercise 2:

Let’s construct a random graph à la Erdős–Rényi ourselves.

Some conceptual help. We want to create a graph that has n nodes which get randomly connected. Accordingly, we have to decide on a probability that determines whether or not an edge will get to connect two nodes. Let’s choose 40% for the time being.

  1. Start with an empty graph and add n nodes. Then,

  2. Write an algorithm that determines (based on our given probability) whether any two nodes will be connected by an edge or not.

  3. If you can, wrap your code in a function so that you can generate multiple graphs with different parameter choices automatically. Try to include an option that lets you choose whether to generate a directed or undirected graph.

  4. Visualise your graph.

generate_erdos_renyi_func <- function(number_of_nodes, p, directed = TRUE) {
  
  # Return a Erdős–Rényi graph.
  
  # Parameters
  # ----------
  # number_of_nodes : int
  #     The number of nodes
  # p : float
  #     The probability of creating an edge
  # directed : boolean
  #     Whether returned graph is directed or undirected. Default is TRUE.
  # 
  # Notes
  # -----
  # Algorithm of creation: 
  #   1. Create a ring over n nodes. 
  #   2. For each node, connect it with k nearest neighbors (k-1 neighbors if k is odd).
  #   3. Create shortcuts by replacing some edges as follows:
  #       - for each edge u-v in the underlying 'n-ring with k nearest neighbors'
  #         with probability p replace it with a new edge u-w with uniformly
  #         random choice of existing node w.
  # 
  # The random rewiring does not increase the number of edges and the rewired graph
  # is not guaranteed to be connected.
  # 
  # References
  # ----------
  # .. [1] Duncan J. Watts and Steven H. Strogatz,
  #    Collective dynamics of small-world networks,
  #    Nature, 393, pp. 440--442, 1998.

  
  if (directed) {
    G <- make_empty_graph(n = number_of_nodes, directed = TRUE)
    edges <- permutations(number_of_nodes, 2) # for directed graph
  } else {
    G <- make_empty_graph(n = number_of_nodes, directed = FALSE)
    edges <- combinations(number_of_nodes, 2) # for undirected graph 
  }
  
  for (i in seq(length(edges)/2)) {
    if (runif(1) <= p){
      #G <- G + edge(edges[i, 1], edges[i, 2])
      G <- G %>% add_edges(c(edges[i, 1], edges[i, 2]))
    } 
  }
  return(G)
}

set.seed(123) # Set seed for reproducibility
G <- generate_erdos_renyi_func(number_of_nodes = 10, p = 1, directed = FALSE)

# Plot the graph
plot(G, layout = layout.circle, vertex.label = 1:10)

Erdős–Rényi networks are a good starting point in building random graphs. A surprisingly realisitic feature of them is the short average path length. In contrast to real-world networks, however, ER-graphs are characterised by a low clustering coefficient. To overcome such shortcomings, more sophisticated random graph models have been developed. The two most famous examples are the Watts-Strogatz and the Barabási-Albert model.

Both types of networks aim to provide a simple mathematical model that result in more realistic features than ER-graphs. While the Watts-Strogatz model focuses on the observation that many networks show a ‘small-world’ effect, the Barabási-Albert model proposes a preferential link attachment mechanism to create a graph with a scale-free (i.e. power law) degree distribution.

There are plenty of other models in the literature, which aim to generate realistic network properties. Among the most prominent ones is the ‘configuration model’, which is a versatile model to produce graphs with any form of degree distribution. For more details, see for example Newman (2018) Chapter 12 and Latora et al. (2018) Chapter 5.

In the following exercises, we focus on the Watts-Strogatz model and the Barabási-Albert model.

Watts–Strogatz model

Exercise 3

Below you find the construction of a Watts-Strogatz model from scratch. Try to understand the code and describe in your own words what is going on. What is typical for this structure? Which mechanism do you think could potentially guide its emergence?

# Function to create a Watts-Strogatz graph
create_watts_strogatz_graph <- function(n, k, p) {
  
    # Return a Watts-Strogatz small-world graph.
    # 
    # Parameters
    # ----------
    # n : int
    #     The number of nodes
    # k : int
    #     Each node is connected to k nearest neighbors in ring topology
    # p : float
    #     The probability of rewiring each edge
    # 
    # Notes
    # -----
    # Algorithm of creation: 
    #   1. Create a ring over n nodes. 
    #   2. For each node, connect it with k nearest neighbors (k-1 neighbors if k is odd).
    #   3. Create shortcuts by replacing some edges as follows:
    #       - for each edge u-v in the underlying 'n-ring with k nearest neighbors'
    #         with probability p replace it with a new edge u-w with uniformly
    #         random choice of existing node w.
    # 
    # The random rewiring does not increase the number of edges and the rewired graph
    # is not guaranteed to be connected.
    # 
    # References
    # ----------
    # .. [1] Duncan J. Watts and Steven H. Strogatz,
    #    Collective dynamics of small-world networks,
    #    Nature, 393, pp. 440--442, 1998.
  
  
  # Create a ring 
  #g <- graph.lattice(dim = c(1, n), nei = k, directed = FALSE, circular = TRUE)
  
  G <- make_empty_graph(n, directed = FALSE)

  # connect k nearest neighbors
  ls_of_nodes <- seq(n)
  
  edges <- list()
  
  for (j in seq(floor(k/2)) ) {
    # Shift elements
    target_nodes <- c(tail(ls_of_nodes, -j), head(ls_of_nodes, j))
    # create edges
    more_edges <- list()
    more_edges <- cbind(ls_of_nodes, target_nodes)
    edges <- rbind(edges, more_edges)
  }
  
  # add edges ro graph
  G <- add_edges(G, unlist(as.vector(t(edges))))
  
  for (i in seq(ecount(G))) {
    
    u <- edges[i, 1]$ls_of_nodes
    v <- edges[i, 2]$target_nodes
    
    if (runif(1) <= p){
      w <- sample(ls_of_nodes, 1)
      
      # enforce no self-loops or multiple edges
      while ((w == u) | (are.connected(G, u, w))) {
        w <- sample(ls_of_nodes, 1)
      }
        
      if (degree(G, u) >= n-1) {
        break
      }
      
      else {
        G <- delete_edges(G, edge = c(u, v))
        G <- add_edges(G, c(u, w))
      }
    }
    }
  
  return(G)
}
  
set.seed(123) # Set seed for reproducibility
G <- create_watts_strogatz_graph(25, 5, 0.2)

# Plot the graph
plot(G, layout = layout.circle)

Barabási–Albert model

Another really famous random graph is the Barabási–Albert model: It is grown by attaching new nodes each with m edges preferentially to existing nodes with high degree. You can find the documentation of the igraph function here.

G_pa <- sample_pa(30, directed = FALSE)
plot(G_pa, layout = layout.circle)

Exercise 4:

In this exercise, we will start comparing a “real-world” dataset with some of the random graph models that were introduced in this week’s lecture. The dataset contains the citation network of DBLP – a database of different scientific publications (source).

  1. Generate an Erdős-Rényi graph, a Watts-Strogatz model, and a Barabási–Albert model with (approximately) the same number of nodes and edges as observed in the citation network (use your self-written function from above for the Erdős-Rényi graph if you can).

For all four networks,

  1. Plot the probability distribution (PDF) of finding a node with a certain degree over the degrees, and

  2. Plot the complement of a cumulative distribution (CCDF) in log-log scale.

The CCDF displays basically the same information as the probability distribution over degrees, but instead of having the ‘probability of finding a node with its degree equal to k’ on the y-axis, the CCDF shows the ‘probability of finding a node of degree k or higher’. The CCDF is not a scattergram anymore, but a function that comes in handy if you want to fit it. Discuss the salient differences between the observed distributions for PDF and CCDF.

  1. Compare and interpret.
# (1) 

# Function to print information about the graph
get_info <- function(graph) {
  cat("Network Summary:\n")
  cat("Number of nodes: ", vcount(graph), "\n")
  cat("Number of edges: ", ecount(graph), "\n")
  cat("Density: ", round(igraph::graph.density(graph), 5), "\n")
  cat("Average in-degree: ", round(mean(igraph::degree(graph, mode = "in")), 2), "\n")
  cat("Average out-degree: ", round(mean(igraph::degree(graph, mode = "out")), 2), "\n")
}

# Read in the citation network from a CSV file
df_cit <- read.csv("./data/cit-DBLP.csv")
df_cit <- df_cit[, -c(1)]
# Create a directed graph from the data frame
G_cit <- graph_from_data_frame(df_cit, directed = TRUE)
# Set the name of the graph
G_cit$name <- "Citation network"

# Generate Erdős–Rényi graph
G_er <- sample_gnp(n = 12591, p = graph.density(G_cit), directed = TRUE)
G_er$name <- "Erdös-Renyi graph"

# Generate Watts Strogatz graph
G_sm <- sample_smallworld(dim = 1, size = 12591, nei = 5, p = 0.3, loops = FALSE, multiple = FALSE)
G_sm$name <- "Watts-Strogratz graph"

# Generate Barabasi-Albert graph
G_pa <- sample_pa(n = 12591, m = 4, directed = TRUE)
G_pa$name <- "Barabasi-Albert graph"

# Call the function to get information about the citation network
get_info(G_cit)
## Network Summary:
## Number of nodes:  12591 
## Number of edges:  49743 
## Density:  0.00031 
## Average in-degree:  3.95 
## Average out-degree:  3.95
get_info(G_er)
## Network Summary:
## Number of nodes:  12591 
## Number of edges:  49593 
## Density:  0.00031 
## Average in-degree:  3.94 
## Average out-degree:  3.94
get_info(G_sm)
## Network Summary:
## Number of nodes:  12591 
## Number of edges:  62955 
## Density:  0.00079 
## Average in-degree:  10 
## Average out-degree:  10
get_info(G_pa)
## Network Summary:
## Number of nodes:  12591 
## Number of edges:  50354 
## Density:  0.00032 
## Average in-degree:  4 
## Average out-degree:  4
G_er$name
## [1] "Erdös-Renyi graph"
# (2) 

# Function to plot the Probability Density Function (PDF) for graph degrees
plot_PDF <- function(graph) {
  degree_df <- as.data.frame(igraph::degree(graph))
  colnames(degree_df) <- "degree"
  group_totals <- count(degree_df, degree)
  group_totals["share"] <- group_totals$n/vcount(graph)
  #  ggplot(degree_df) +
   # geom_histogram(bins = 20, aes(x = degree, y = after_stat(density))) +#, fill = "grey", color = "black") +
  #  scale_y_log10() + 
   # labs(title = paste0("PDF for the ", graph$name), x = "Degree k", y = "P(k)")
    
    hist(log(degree_df$degree), # histogram
       col="grey", # column color
       border="white",
       prob = TRUE, # show densities instead of frequencies
       xlab = "Degree k",
       ylab = "Density of the logged values", 
       main = paste0("PDF for the ", graph$name))
}

plot_PDF(G_cit)

plot_PDF(G_er)

plot_PDF(G_sm)

plot_PDF(G_pa)

# (3)


plot_CCDF <- function(graph) {
  degree_df <- as.data.frame(igraph::degree(graph))
  colnames(degree_df) <- "degree"
  group_totals <- count(degree_df, degree)
  
  sorted_vals <- sort(unique(igraph::degree(graph)), decreasing = FALSE)
  
  ccdf <- rep(0, length(sorted_vals))
  n <- as.double(length(sorted_vals))
  
  for (i in seq(length(sorted_vals))) {
    ccdf[i] <- sum(degree_df >= sorted_vals[i])/n
  }
  
  ccdf_df <- as.data.frame(cbind(sorted_vals, ccdf))
  
  
  ggplot(ccdf_df, aes(x = sorted_vals, y = ccdf)) +
    geom_line() +
    scale_y_log10() +
    scale_x_log10() + 
    labs(title = paste0("CCDF for the ", graph$name), x = "Degree k", y = "P(k>=x)")
}


plot_CCDF(G_cit)

plot_CCDF(G_er)
## Warning: Transformation introduced infinite values in continuous x-axis

plot_CCDF(G_sm)

plot_CCDF(G_pa)

Exercise 5:

Exercise 5: Comparison of clustering coefficients

  1. Compare the clustering coefficients of all three networks. You can use igraph::transitivity(graph) to calculate them. What do your findings tell you?
  2. Calculate the connection probability p for your graphs.

\(p \frac{n(n-1)}{2} = m\)

with p: connection probability, m: number of edges, n: number of nodes

How does the connection probability compare to the clustering coefficients?

# comparison of clustering coefficients (measure of the degree to which nodes in a graph tend to cluster together)

graphs_ls <- list(G_cit, G_er, G_sm, G_pa)

for (g in graphs_ls) {
  c_coef = igraph::transitivity(g)
  cat(g$name,":", round(c_coef, 4), "\n")
}
## Citation network : 0.062 
## Erdös-Renyi graph : 0.0007 
## Watts-Strogratz graph : 0.0748 
## Barabasi-Albert graph : 0.0029
calculate_connection_probability <- function(g) {
  g <- as.undirected(g, mode = "collapse") # render graph undirected
  m = vcount(g)
  n = ecount(g)
  p = 2*m/(n*(n-1)) # this is for undirected graphs 
  cat(g$name,":", round(p, 5), "\n")
}
for (g in graphs_ls) {
  calculate_connection_probability(g)
}
## Citation network : 0.00001 
## Erdös-Renyi graph : 0.00001 
## Watts-Strogratz graph : 0.00001 
## Barabasi-Albert graph : 0.00001

The amount of clustering is another distinguishing feature in random graph models.

Real-world networks usually have a certain amount of clustering. This might be due to several different reasons. In social friendship networks, for example, ties do not form at random but around common friends (the friends of my friends are my friends), also known as ‘triadic closure’, or shared value sets, a.k.a. homophily. Both these types of link formation in social networks will be discussed more in week 6.

In contrast to empirical networks, the simple ER graph does not have clustering, as links form as random, leading to few triangles. In contrast, the Watts-Strogatz model exhibits excessive clustering, as large parts of the network consist of closely connected triangles. The Barabási-Albert model has more realistic clustering values in this case, but clustering values are usually different from behaviour observed in empirical networks.

The connection probability is the same for all four models. It is, thus, not responsible for the differences in clustering.

Exercise 6:

Generate a series of Erdős-Rényi graphs with different specifications. Set the number of nodes fix to 1000 nodes, and vary p between 0.00025 and 0.0025 in steps of 0.000025. Create a plot showing the size of the largest connected component plotted against p. Can you pinpoint the phase transition?

probs <- list()
sizes <- list()

for (p in seq(0.00025, 0.0025, 0.000025)){
  probs <- append(probs, p)
  G_er <- sample_gnp(n = 1000, p , directed = FALSE)
  size_LCC <- max(igraph::components(G_er)$csize) 
  sizes <- append(sizes, size_LCC/1000)
}

plot(probs, sizes, main = "Visualisation of the phase transition", xlab = "Connection probability p", ylab = "% of nodes in LCC", pch = 16, col = "steelblue")

In a graph \(G_{n, p}\), the presence of a giant component (a component containing more than 50% of all nodes of the whole graph) is dependent on the value of p. If we had no pre-knowledge about the science of networks, our expectation for the plot above would probably have been to see a linear relationship. This, however, is not what we observe: instead of a linear relationship we find that there is a special value of p for which we observe a phase transition: if p is lower than that value there is no giant component; if it is higher we find most nodes belonging to the giant connected component.

In the ER-model, the threshold for all nodes to be connected is \(t(n) = log(n)/n\) (equivalent for values of p).